## ----------------------------------------------
## Sample solutions from 200-precinct florida map
## ----------------------------------------------
library(redist)
library(maptools)
library(igraph)
library(tidyverse)
library(spdep)

## -----
## Setup
## -----
i <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID"))

fl_map <- readShapePoly("../../data/fl/FL.shp")

nodes <- strsplit(readLines("../../data/largemap_enum_sample/map_sub_nodes.dat")," ")[[1]]
## nodes <- strsplit(readLines("../../data/big_validation_map/fl_sub_nodes.dat")," ")[[1]]

## First, subset down to right map
fl_sub <- fl_map[which(rownames(fl_map@data) %in% nodes),]

## Then, reorder nodes to be correct order to match - otherwise, won't
## line up properly with original map
fl_sub <- fl_sub[match(nodes, rownames(fl_sub@data)),]

## Convert factor columns to characters, because this particular data is messy
indx <- sapply(fl_sub@data, is.factor)
fl_sub@data[indx] <- lapply(
    fl_sub@data[indx], function(x) as.numeric(as.character(x))
)

## ----------------------------
## Run enumpart, dump solutions
## ----------------------------
system(paste0("../../enumpart_private/enumpart ../../data/largemap_enum_sample/map_sub.dat ../../data/largemap_enum_sample/zdd.txt -readfile -sample 10000000 > /n/imai_lab/bfifield/enumeration/largemap_enum_sample/solutions_", i, ".dat"))

## system(paste0("../../enumpart_private/enumpart ../../data/big_validation_map/fl_sub_ordered_", i, ".dat -k 2 -sample 10000000 > /n/imai_lab/bfifield/enumeration/largemap_enum_sample/solutions_", i, ".dat"))

## -----------------------------
## Load in solutions, preprocess
## -----------------------------
sols_all <- readLines(paste0("/n/imai_lab/bfifield/enumeration/largemap_enum_sample/solutions_", i, ".dat"))

## Create edgelist
el <- strsplit(readLines("../../data/largemap_enum_sample/map_sub.dat"), split = " ")
## el <- strsplit(readLines(paste0("../../data/big_validation_map/fl_sub_", i, ".dat")), split = " ")
el <- apply(do.call(rbind, el), 2, as.numeric)

## All solutions to matrix
ndists <- 2
sols_out_all <- lapply(1:length(sols_all), function(x){
    sol_split <- as.numeric(strsplit(sols_all[x], split = " ")[[1]])
    el_sub <- el[sol_split,]
    comps_out <- components(graph_from_edgelist(el_sub, directed = FALSE))
    if(comps_out$no < ndists){
        max_num <- max(comps_out$membership)
        comps_out <- c(comps_out$membership, (max_num + 1):ndists)
    }else{
        comps_out <- comps_out$membership
    }
    return(comps_out)
})
sols_out_all <- do.call(cbind, sols_out_all)

## Calculate dissimilarity index and parity
seg_truth_sample <- unlist(lapply(1:ncol(sols_out_all), function(x){
    T <- sum(fl_sub@data$pop)
    P <- sum(fl_sub@data$mccain) / T
    t_i <- tapply(fl_sub@data$pop, sols_out_all[,x], sum)
    p_i <- tapply(fl_sub@data$mccain, sols_out_all[,x], sum) / t_i
    return(sum(t_i * abs(p_i - P) / (2 * T * P * (1 - P)), na.rm = TRUE))
}))

pop_dist_sample <- unlist(lapply(1:ncol(sols_out_all), function(x){
    distpop <- tapply(fl_sub@data$pop, sols_out_all[,x], sum)
    parpop <- sum(distpop) / length(distpop)
    return(max(abs(distpop / parpop - 1)))
}))

seg_truth_sample <- data.frame(seg = seg_truth_sample, par = pop_dist_sample)
write_csv(seg_truth_sample, path = paste0("../../data/largemap_enum_sample/dissimilarity_", i, ".csv"))
